import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import seaborn as sns
import scipy as sp
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_predict
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.arima.model import ARIMAResults
from scipy.signal import detrend
# Load Data
data_path = '/content/BaggageComplaints.csv'
bag_dat = pd.read_csv(data_path)
bag_dat.head
bag_dat.info()
print(bag_dat)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 8 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Airline 252 non-null object
1 Date 252 non-null object
2 Month 252 non-null int64
3 Year 252 non-null int64
4 Baggage 252 non-null int64
5 Scheduled 252 non-null int64
6 Cancelled 252 non-null int64
7 Enplaned 252 non-null int64
dtypes: int64(6), object(2)
memory usage: 15.9+ KB
Airline Date Month Year Baggage Scheduled Cancelled \
0 American Eagle 01/2004 1 2004 12502 38276 2481
1 American Eagle 02/2004 2 2004 8977 35762 886
2 American Eagle 03/2004 3 2004 10289 39445 1346
3 American Eagle 04/2004 4 2004 8095 38982 755
4 American Eagle 05/2004 5 2004 10618 40422 2206
.. ... ... ... ... ... ... ...
247 United 08/2010 8 2010 14099 30637 344
248 United 09/2010 9 2010 9435 28072 161
249 United 10/2010 10 2010 9565 29144 140
250 United 11/2010 11 2010 8597 27318 104
251 United 12/2010 12 2010 14415 27619 599
Enplaned
0 992360
1 1060618
2 1227469
3 1234451
4 1267581
.. ...
247 4263211
248 3679517
249 3952549
250 3573268
251 3493643
[252 rows x 8 columns]
bag_dat_sub = bag_dat.drop(columns=['Airline', 'Date', 'Month', 'Year'])
# Compute correlations
cormat = bag_dat_sub.corr()
# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)
# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()
bag_dat_American_Eagle = bag_dat[bag_dat['Airline'] == 'American Eagle']
bag_dat_American_Eagle.head(5)
| Airline | Date | Month | Year | Baggage | Scheduled | Cancelled | Enplaned | |
|---|---|---|---|---|---|---|---|---|
| 0 | American Eagle | 01/2004 | 1 | 2004 | 12502 | 38276 | 2481 | 992360 |
| 1 | American Eagle | 02/2004 | 2 | 2004 | 8977 | 35762 | 886 | 1060618 |
| 2 | American Eagle | 03/2004 | 3 | 2004 | 10289 | 39445 | 1346 | 1227469 |
| 3 | American Eagle | 04/2004 | 4 | 2004 | 8095 | 38982 | 755 | 1234451 |
| 4 | American Eagle | 05/2004 | 5 | 2004 | 10618 | 40422 | 2206 | 1267581 |
bag_dat_sub_American_Eagle = bag_dat_American_Eagle.drop(columns=['Airline', 'Date', 'Month', 'Year'])
# Compute correlations
cormat = bag_dat_sub_American_Eagle.corr()
# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)
# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()
# Convert date column to a date class variable
# Create a monthly index for the series
bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
bag_ts_American_Eagle = pd.Series(bag_dat_American_Eagle['Baggage'].values, index=bag_dat_American_Eagle['Date'])
bag_ts_American_Eagle.index.freq = 'MS'
#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_American_Eagle.plot()
plt.title("Baggage Claims for American Eagle Airlines")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.show()
<ipython-input-1216-c058082e6f6e>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_American_Eagle = bag_dat_American_Eagle[['Baggage']]
sm.stats.acorr_ljungbox(Bags_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 363.970723 | 4.308185e-72 |
#Plot the ACF and PACF for the coffee series
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(bag_ts_American_Eagle, ax=ax2_American_Eagle, zero=False)
plt.show()
For seasonal time series, the first step in the process is taking a seasonal difference. To do this, you need to include the seasonal_order(P,D,Q,s) option when you run the ARIMA procedure. P is the number of seasonal AR terms, Q is the number of seasonal MA terms, s is the length of the seasonal period (for example, s=12 if it takes 12 months to complete the seasonal cycle). To take a seasonal difference set D=1.
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,0), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 2, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
Notice that the ACF and PACF for the residuals look like a typical autoregressive signature, so the next step might be to add p = 1 to include an AR(1) term in the model. This will not be sufficient to get the residuals to white noise. Recall that an MA term can help to smooth out a model after differencing. In this case we took a seasonal difference, so we'll include a seasonal MA term by setting Q=1 in the seasonal order.
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(1, 1, 1, 12) Log Likelihood -665.168
Date: Mon, 23 Oct 2023 AIC 1340.336
Time: 02:44:22 BIC 1351.719
Sample: 01-01-2004 HQIC 1344.868
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9713 0.027 36.332 0.000 0.919 1.024
ma.L1 -0.3991 0.094 -4.227 0.000 -0.584 -0.214
ar.S.L12 0.1188 0.126 0.940 0.347 -0.129 0.367
ma.S.L12 -0.9886 0.159 -6.217 0.000 -1.300 -0.677
sigma2 4.778e+06 3.42e-08 1.4e+14 0.000 4.78e+06 4.78e+06
===================================================================================
Ljung-Box (L1) (Q): 3.28 Jarque-Bera (JB): 1.83
Prob(Q): 0.07 Prob(JB): 0.40
Heteroskedasticity (H): 0.22 Skew: 0.16
Prob(H) (two-sided): 0.00 Kurtosis: 3.71
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.29e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1fa07f0>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 9044.082011 | 2725.486659 | 15362.677364 |
| 2004-03-01 | 8121.529686 | 2039.178630 | 14203.880742 |
| 2004-04-01 | 8331.565687 | 2276.700644 | 14386.430729 |
| 2004-05-01 | 7268.907708 | 1214.894887 | 13322.920529 |
| 2004-06-01 | 8014.247678 | 1961.363000 | 14067.132357 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 9044.082011 | 2725.486659 | 15362.677364 | 8977 |
| 2004-03-01 | 8121.529686 | 2039.178630 | 14203.880742 | 10289 |
| 2004-04-01 | 8331.565687 | 2276.700644 | 14386.430729 | 8095 |
| 2004-05-01 | 7268.907708 | 1214.894887 | 13322.920529 | 10618 |
| 2004-06-01 | 8014.247678 | 1961.363000 | 14067.132357 | 13684 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 13.988% RMSE = 2,840.712
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
warn('Non-invertible starting MA parameters found.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 1)x(0, 1, 1, 12) Log Likelihood -666.623
Date: Mon, 23 Oct 2023 AIC 1343.246
Time: 02:44:25 BIC 1354.629
Sample: 01-01-2004 HQIC 1347.778
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.0264 0.109 -0.242 0.808 -0.240 0.187
ar.L2 0.9148 0.087 10.523 0.000 0.744 1.085
ma.L1 0.8751 0.114 7.651 0.000 0.651 1.099
ma.S.L12 -0.9898 0.159 -6.220 0.000 -1.302 -0.678
sigma2 4.643e+06 3.51e-08 1.32e+14 0.000 4.64e+06 4.64e+06
===================================================================================
Ljung-Box (L1) (Q): 0.13 Jarque-Bera (JB): 3.35
Prob(Q): 0.72 Prob(JB): 0.19
Heteroskedasticity (H): 0.22 Skew: -0.16
Prob(H) (two-sided): 0.00 Kurtosis: 4.01
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.05e+29. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b660b0>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 2)x(0, 1, [1], 12) Log Likelihood -664.733
Date: Mon, 23 Oct 2023 AIC 1339.466
Time: 02:44:30 BIC 1350.849
Sample: 01-01-2004 HQIC 1343.998
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9832 0.021 46.902 0.000 0.942 1.024
ma.L1 -0.3366 0.094 -3.571 0.000 -0.521 -0.152
ma.L2 -0.1252 0.117 -1.074 0.283 -0.354 0.103
ma.S.L12 -0.9926 0.154 -6.450 0.000 -1.294 -0.691
sigma2 4.568e+06 3.42e-08 1.34e+14 0.000 4.57e+06 4.57e+06
===================================================================================
Ljung-Box (L1) (Q): 1.21 Jarque-Bera (JB): 2.30
Prob(Q): 0.27 Prob(JB): 0.32
Heteroskedasticity (H): 0.24 Skew: 0.28
Prob(H) (two-sided): 0.00 Kurtosis: 3.68
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.9e+28. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b66800>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 2)x(0, 1, [1], 12) Log Likelihood -665.093
Date: Mon, 23 Oct 2023 AIC 1342.187
Time: 02:44:34 BIC 1355.847
Sample: 01-01-2004 HQIC 1347.625
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 1.1811 1.039 1.136 0.256 -0.856 3.218
ar.L2 -0.1841 1.032 -0.178 0.858 -2.208 1.839
ma.L1 -0.5189 1.020 -0.509 0.611 -2.519 1.481
ma.L2 -0.0614 0.456 -0.135 0.893 -0.955 0.832
ma.S.L12 -0.9775 0.172 -5.690 0.000 -1.314 -0.641
sigma2 4.767e+06 7.25e-08 6.57e+13 0.000 4.77e+06 4.77e+06
===================================================================================
Ljung-Box (L1) (Q): 1.08 Jarque-Bera (JB): 1.77
Prob(Q): 0.30 Prob(JB): 0.41
Heteroskedasticity (H): 0.25 Skew: 0.20
Prob(H) (two-sided): 0.00 Kurtosis: 3.65
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.63e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1b49600>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
=============================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(0, 1, [1, 2], 12) Log Likelihood -665.213
Date: Mon, 23 Oct 2023 AIC 1340.425
Time: 02:44:38 BIC 1351.809
Sample: 01-01-2004 HQIC 1344.957
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9737 0.025 38.686 0.000 0.924 1.023
ma.L1 -0.4019 0.094 -4.257 0.000 -0.587 -0.217
ma.S.L12 -0.9000 0.167 -5.386 0.000 -1.228 -0.572
ma.S.L24 -0.0970 0.116 -0.836 0.403 -0.324 0.130
sigma2 4.722e+06 3.48e-08 1.36e+14 0.000 4.72e+06 4.72e+06
===================================================================================
Ljung-Box (L1) (Q): 3.17 Jarque-Bera (JB): 1.72
Prob(Q): 0.08 Prob(JB): 0.42
Heteroskedasticity (H): 0.23 Skew: 0.17
Prob(H) (two-sided): 0.00 Kurtosis: 3.68
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.55e+30. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a02abaf0>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0)x(0, 1, [1], 12) Log Likelihood -669.273
Date: Mon, 23 Oct 2023 AIC 1344.546
Time: 02:44:39 BIC 1351.376
Sample: 01-01-2004 HQIC 1347.265
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9322 0.025 37.466 0.000 0.883 0.981
ma.S.L12 -0.8991 0.167 -5.397 0.000 -1.226 -0.573
sigma2 5.34e+06 3.34e-08 1.6e+14 0.000 5.34e+06 5.34e+06
===================================================================================
Ljung-Box (L1) (Q): 0.40 Jarque-Bera (JB): 5.16
Prob(Q): 0.53 Prob(JB): 0.08
Heteroskedasticity (H): 0.24 Skew: -0.21
Prob(H) (two-sided): 0.00 Kurtosis: 4.24
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2.03e+29. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3c61750>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(0, 1, 1, 12) Log Likelihood -665.483
Date: Mon, 23 Oct 2023 AIC 1338.966
Time: 02:44:40 BIC 1348.073
Sample: 01-01-2004 HQIC 1342.591
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9790 0.022 45.408 0.000 0.937 1.021
ma.L1 -0.4075 0.096 -4.246 0.000 -0.596 -0.219
ma.S.L12 -0.9971 0.152 -6.539 0.000 -1.296 -0.698
sigma2 4.693e+06 3.27e-08 1.44e+14 0.000 4.69e+06 4.69e+06
===================================================================================
Ljung-Box (L1) (Q): 2.60 Jarque-Bera (JB): 1.88
Prob(Q): 0.11 Prob(JB): 0.39
Heteroskedasticity (H): 0.24 Skew: 0.27
Prob(H) (two-sided): 0.00 Kurtosis: 3.59
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 7.54e+28. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1addd20>
# Plot ACF and PACF for residuals
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
After reviewing the ACF and PACF for the resiuals from this model and noticing that the Ljung Box test (below) indicates that the residuals are white noise, this seems to be a reasonable model for the data.
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 14.185104 | 0.164715 |
Convert predictions into a data frame in order to compute the forecast error
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 8877.523581 | 2308.238573 | 15446.808589 |
| 2004-03-01 | 8010.148141 | 1679.242038 | 14341.054243 |
| 2004-04-01 | 8220.446741 | 1916.472828 | 14524.420654 |
| 2004-05-01 | 7162.927350 | 859.526021 | 13466.328679 |
| 2004-06-01 | 7885.027953 | 1583.340640 | 14186.715265 |
Join actual values to predicted values
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 8877.523581 | 2308.238573 | 15446.808589 | 8977 |
| 2004-03-01 | 8010.148141 | 1679.242038 | 14341.054243 | 10289 |
| 2004-04-01 | 8220.446741 | 1916.472828 | 14524.420654 | 8095 |
| 2004-05-01 | 7162.927350 | 859.526021 | 13466.328679 | 10618 |
| 2004-06-01 | 7885.027953 | 1583.340640 | 14186.715265 | 13684 |
Calculate the Forecast Error
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 14.014% RMSE = 2,880.493
Generate the forecast for the next six months
# Generate forecast for the next six months
forecast_American_Eagle = fit_AR1_American_Eagle.get_forecast(steps=6)
mean_forecast_American_Eagle = forecast_American_Eagle.predicted_mean
confidence_intervals_American_Eagle = forecast_American_Eagle.conf_int()
# Create a DataFrame with forecasted values and confidence intervals
forecast_df_American_Eagle = pd.DataFrame({
'Forecasted_CPI': mean_forecast_American_Eagle,
'Lower_Bound': confidence_intervals_American_Eagle.iloc[:, 0],
'Upper_Bound': confidence_intervals_American_Eagle.iloc[:, 1]
})
forecast_df_American_Eagle
| Forecasted_CPI | Lower_Bound | Upper_Bound | |
|---|---|---|---|
| 2011-01-01 | 10558.880550 | 6047.453083 | 15070.308016 |
| 2011-02-01 | 7716.470919 | 2537.663582 | 12895.278256 |
| 2011-03-01 | 9475.082087 | 3734.462212 | 15215.701962 |
| 2011-04-01 | 7834.568862 | 1607.491752 | 14061.645972 |
| 2011-05-01 | 8397.925840 | 1742.128457 | 15053.723223 |
| 2011-06-01 | 11098.022980 | 4059.863838 | 18136.182121 |
Plot the series and append the six month forecast. Notice that the forecast follows a pattern similar to the one observed in the actual data
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_American_Eagle.plot(label='Observed', legend=True)
mean_forecast_American_Eagle.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_American_Eagle.index,
confidence_intervals_American_Eagle.iloc[:, 0],
confidence_intervals_American_Eagle.iloc[:, 1], color='pink', alpha=0.3)
plt.title("American Eagle Baggage Claims Forecast")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()
bag_dat_United = bag_dat[bag_dat['Airline'] == 'United']
bag_dat_United.head(5)
| Airline | Date | Month | Year | Baggage | Scheduled | Cancelled | Enplaned | |
|---|---|---|---|---|---|---|---|---|
| 168 | United | 01/2004 | 1 | 2004 | 25015 | 45809 | 1017 | 4434315 |
| 169 | United | 02/2004 | 2 | 2004 | 16660 | 42675 | 312 | 4458657 |
| 170 | United | 03/2004 | 3 | 2004 | 19318 | 46512 | 321 | 5302929 |
| 171 | United | 04/2004 | 4 | 2004 | 15638 | 45309 | 162 | 5288871 |
| 172 | United | 05/2004 | 5 | 2004 | 19302 | 46630 | 652 | 5408451 |
bag_dat_sub_United = bag_dat_United.drop(columns=['Airline', 'Date', 'Month', 'Year'])
# Compute correlations
cormat = bag_dat_sub_United.corr()
# Round correlation matrix to 2 decimal places
cormat = cormat.round(2)
# Plot correlation matrix using a heatmap
plt.figure(figsize = (10,8))
sns.heatmap(cormat,annot=True, cmap = 'coolwarm')
plt.show()
# Convert date column to a date class variable
# Create a monthly index for the series
bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
bag_ts_United = pd.Series(bag_dat_United['Baggage'].values, index=bag_dat_United['Date'])
bag_ts_United.index.freq = 'MS'
#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_United.plot()
plt.title("Baggage Claims for United")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.show()
<ipython-input-1247-cb68bd3cd6fe>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_United = bag_dat_United[['Baggage']]
sm.stats.acorr_ljungbox(Bags_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 160.317171 | 2.786746e-29 |
#Plot the ACF and PACF for the coffee series
fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_United, ax=ax1_United, zero=False)
plot_pacf(bag_ts_United, ax=ax2_United, zero=False)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_United = ARIMA(bag_ts_United, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_United = fit_ARD12_United.resid
fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_United, ax=ax1_United, zero=False)
plot_pacf(residuals12_United, ax=ax2_United, zero=False, lags=9)
plt.show()
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0)x(0, 1, [1], 12) Log Likelihood -706.585
Date: Mon, 23 Oct 2023 AIC 1419.170
Time: 02:44:45 BIC 1426.000
Sample: 01-01-2004 HQIC 1421.890
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.8258 0.035 23.635 0.000 0.757 0.894
ma.S.L12 -0.6496 0.116 -5.580 0.000 -0.878 -0.421
sigma2 1.813e+07 1.72e-09 1.05e+16 0.000 1.81e+07 1.81e+07
===================================================================================
Ljung-Box (L1) (Q): 0.48 Jarque-Bera (JB): 17.36
Prob(Q): 0.49 Prob(JB): 0.00
Heteroskedasticity (H): 0.30 Skew: -0.47
Prob(H) (two-sided): 0.00 Kurtosis: 5.21
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 3.51e+31. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a172e3e0>
# Plot ACF and PACF for residuals
fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_United, ax=ax1_United, zero=False)
plot_pacf(residuals_United, ax=ax2_United, zero=False, lags=9)
plt.show()
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 16.951913 | 0.075435 |
# convert to data frame
predicted_mean_United = pred_units_United.predicted_mean
conf_int_United = pred_units_United.conf_int()
prediction_df_United = pd.DataFrame({
'predicted_mean': predicted_mean_United,
'lower_bound': conf_int_United.iloc[:, 0],
'upper_bound': conf_int_United.iloc[:, 1]
})
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 19898.744014 | 9639.203017 | 30158.285011 |
| 2004-03-01 | 13278.758482 | 3019.242911 | 23538.274052 |
| 2004-04-01 | 15061.243589 | 4805.267095 | 25317.220083 |
| 2004-05-01 | 12035.958696 | 1785.483779 | 22286.433612 |
| 2004-06-01 | 14649.272687 | 4406.927829 | 24891.617545 |
# add back actuals --
prediction_df_United = pd.concat([prediction_df_United,bag_ts_United.to_frame(name='Baggage')],axis=1)
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 19898.744014 | 9639.203017 | 30158.285011 | 16660 |
| 2004-03-01 | 13278.758482 | 3019.242911 | 23538.274052 | 19318 |
| 2004-04-01 | 15061.243589 | 4805.267095 | 25317.220083 | 15638 |
| 2004-05-01 | 12035.958696 | 1785.483779 | 22286.433612 | 19302 |
| 2004-06-01 | 14649.272687 | 4406.927829 | 24891.617545 | 21892 |
# calc error
errors_United = prediction_df_United['Baggage'] - prediction_df_United['predicted_mean']
# absolue value of error
errors_abs_United = errors_United.abs()
# -- mape --
mape_United = (errors_abs_United/prediction_df_United['Baggage'].abs()).mean()
# -- rmse
rmse_United = (errors_United**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_United:.3%}')
print(f'RMSE = {rmse_United:,.3f}')
MAPE = 17.942% RMSE = 5,462.825
# Generate forecast for the next six months
forecast_United = fit_AR1_United.get_forecast(steps=6)
mean_forecast_United = forecast_United.predicted_mean
confidence_intervals_United = forecast_United.conf_int()
# Create a DataFrame with forecasted values and confidence intervals
forecast_df_United = pd.DataFrame({
'Forecasted_CPI': mean_forecast_United,
'Lower_Bound': confidence_intervals_United.iloc[:, 0],
'Upper_Bound': confidence_intervals_United.iloc[:, 1]
})
forecast_df_United
| Forecasted_CPI | Lower_Bound | Upper_Bound | |
|---|---|---|---|
| 2011-01-01 | 11517.989987 | 3166.755574 | 19869.224401 |
| 2011-02-01 | 8070.936387 | -2757.702057 | 18899.574831 |
| 2011-03-01 | 11672.924016 | -560.511295 | 23906.359327 |
| 2011-04-01 | 9262.037189 | -3843.209135 | 22367.283512 |
| 2011-05-01 | 11463.779642 | -2204.058916 | 25131.618201 |
| 2011-06-01 | 17903.990424 | 3865.490787 | 31942.490060 |
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_United.plot(label='Observed', legend=True)
mean_forecast_United.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_United.index,
confidence_intervals_United.iloc[:, 0],
confidence_intervals_United.iloc[:, 1], color='pink', alpha=0.3)
plt.title("United Airlines Baggage Claims Forecast")
plt.xlabel("Date")
plt.ylabel("# Baggage Claims")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()
# Convert date column to a date class variable
# Create a monthly index for the series
bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
bag_ts_American_Eagle = pd.Series(bag_dat_American_Eagle['Cancelled'].values, index=bag_dat_American_Eagle['Date'])
bag_ts_American_Eagle.index.freq = 'MS'
#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_American_Eagle.plot()
plt.title("Cancelled Flights for American Eagle Airlines")
plt.xlabel("Date")
plt.ylabel("# Cancelled FLights")
plt.show()
<ipython-input-1259-8b5711e5a063>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy bag_dat_American_Eagle['Date'] = pd.to_datetime(bag_dat_American_Eagle['Date'], format="%m/%Y")
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_American_Eagle = bag_dat_American_Eagle[['Cancelled']]
sm.stats.acorr_ljungbox(Bags_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 33.222572 | 0.00025 |
#Plot the ACF and PACF for the coffee series
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(bag_ts_American_Eagle, ax=ax2_American_Eagle, zero=False)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 2, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# which is yt - y(t-12)
fit_ARD12_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(2, 1, 2, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_American_Eagle = fit_ARD12_American_Eagle.resid
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals12_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 1, 1)x(0, 1, 1, 12) Log Likelihood -555.651
Date: Mon, 23 Oct 2023 AIC 1119.303
Time: 02:45:14 BIC 1128.353
Sample: 01-01-2004 HQIC 1122.902
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.0710 0.184 -0.386 0.699 -0.431 0.289
ma.L1 -0.7046 0.108 -6.506 0.000 -0.917 -0.492
ma.S.L12 -0.6649 0.136 -4.889 0.000 -0.931 -0.398
sigma2 3.413e+05 5.82e+04 5.863 0.000 2.27e+05 4.55e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 6.53
Prob(Q): 0.92 Prob(JB): 0.04
Heteroskedasticity (H): 0.99 Skew: 0.42
Prob(H) (two-sided): 0.97 Kurtosis: 4.22
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a04fa530>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(1, 1, 1, 12) Log Likelihood -561.787
Date: Mon, 23 Oct 2023 AIC 1133.573
Time: 02:45:17 BIC 1144.957
Sample: 01-01-2004 HQIC 1138.105
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9363 0.068 13.825 0.000 0.804 1.069
ma.L1 -0.7194 0.117 -6.155 0.000 -0.948 -0.490
ar.S.L12 0.0507 0.222 0.229 0.819 -0.384 0.486
ma.S.L12 -0.6921 0.197 -3.509 0.000 -1.079 -0.306
sigma2 3.277e+05 5.33e+04 6.145 0.000 2.23e+05 4.32e+05
===================================================================================
Ljung-Box (L1) (Q): 0.19 Jarque-Bera (JB): 7.37
Prob(Q): 0.66 Prob(JB): 0.03
Heteroskedasticity (H): 0.93 Skew: 0.40
Prob(H) (two-sided): 0.87 Kurtosis: 4.34
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a186dd20>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 1)x(0, 1, 1, 12) Log Likelihood -561.591
Date: Mon, 23 Oct 2023 AIC 1133.183
Time: 02:45:19 BIC 1144.566
Sample: 01-01-2004 HQIC 1137.715
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.8124 0.215 3.777 0.000 0.391 1.234
ar.L2 0.1112 0.176 0.630 0.529 -0.235 0.457
ma.L1 -0.6553 0.152 -4.316 0.000 -0.953 -0.358
ma.S.L12 -0.6794 0.127 -5.358 0.000 -0.928 -0.431
sigma2 3.246e+05 5.21e+04 6.225 0.000 2.22e+05 4.27e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 8.07
Prob(Q): 0.95 Prob(JB): 0.02
Heteroskedasticity (H): 0.95 Skew: 0.41
Prob(H) (two-sided): 0.90 Kurtosis: 4.42
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3c62770>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 2)x(0, 1, [1], 12) Log Likelihood -559.601
Date: Mon, 23 Oct 2023 AIC 1131.202
Time: 02:45:21 BIC 1144.862
Sample: 01-01-2004 HQIC 1136.640
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.0395 0.051 -0.779 0.436 -0.139 0.060
ar.L2 0.9600 0.050 19.163 0.000 0.862 1.058
ma.L1 0.2030 0.136 1.489 0.137 -0.064 0.470
ma.L2 -0.6620 0.110 -6.021 0.000 -0.877 -0.446
ma.S.L12 -0.9766 0.159 -6.160 0.000 -1.287 -0.666
sigma2 2.577e+05 6.4e-07 4.03e+11 0.000 2.58e+05 2.58e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 3.67
Prob(Q): 0.95 Prob(JB): 0.16
Heteroskedasticity (H): 0.86 Skew: 0.25
Prob(H) (two-sided): 0.72 Kurtosis: 3.98
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 2e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1df8b20>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(0, 1, 1, 12) Log Likelihood -561.808
Date: Mon, 23 Oct 2023 AIC 1131.615
Time: 02:45:22 BIC 1140.722
Sample: 01-01-2004 HQIC 1135.241
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9370 0.061 15.312 0.000 0.817 1.057
ma.L1 -0.7181 0.116 -6.166 0.000 -0.946 -0.490
ma.S.L12 -0.6421 0.123 -5.237 0.000 -0.882 -0.402
sigma2 3.306e+05 5.2e+04 6.352 0.000 2.29e+05 4.33e+05
===================================================================================
Ljung-Box (L1) (Q): 0.19 Jarque-Bera (JB): 7.47
Prob(Q): 0.66 Prob(JB): 0.02
Heteroskedasticity (H): 0.93 Skew: 0.39
Prob(H) (two-sided): 0.87 Kurtosis: 4.38
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a19b13c0>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 246.706788 | -2164.378269 | 2657.791844 |
| 2004-03-01 | 276.001192 | -2127.639065 | 2679.641448 |
| 2004-04-01 | 333.640809 | -2065.472371 | 2732.753989 |
| 2004-05-01 | 324.030000 | -2072.441677 | 2720.501676 |
| 2004-06-01 | 431.173159 | -1963.865843 | 2826.212161 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 246.706788 | -2164.378269 | 2657.791844 | 886 |
| 2004-03-01 | 276.001192 | -2127.639065 | 2679.641448 | 1346 |
| 2004-04-01 | 333.640809 | -2065.472371 | 2732.753989 | 755 |
| 2004-05-01 | 324.030000 | -2072.441677 | 2720.501676 | 2206 |
| 2004-06-01 | 431.173159 | -1963.865843 | 2826.212161 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 43.820% RMSE = 672.910
# Plot ACF and PACF for residuals
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 35.045521 | 0.000123 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(1, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
=========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(1, 1, [], 12) Log Likelihood -564.261
Date: Mon, 23 Oct 2023 AIC 1136.521
Time: 02:45:23 BIC 1145.628
Sample: 01-01-2004 HQIC 1140.147
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.8843 0.110 8.036 0.000 0.669 1.100
ma.L1 -0.6247 0.177 -3.533 0.000 -0.971 -0.278
ar.S.L12 -0.4359 0.153 -2.846 0.004 -0.736 -0.136
sigma2 3.664e+05 4.84e+04 7.566 0.000 2.71e+05 4.61e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 8.14
Prob(Q): 0.95 Prob(JB): 0.02
Heteroskedasticity (H): 1.31 Skew: -0.07
Prob(H) (two-sided): 0.51 Kurtosis: 4.64
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a17b4910>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 307.726972 | -2107.907098 | 2723.361042 |
| 2004-03-01 | 326.234049 | -2079.324186 | 2731.792283 |
| 2004-04-01 | 382.482066 | -2017.769151 | 2782.733284 |
| 2004-05-01 | 356.566959 | -2041.033200 | 2754.167118 |
| 2004-06-01 | 480.027285 | -1916.384609 | 2876.439179 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 307.726972 | -2107.907098 | 2723.361042 | 886 |
| 2004-03-01 | 326.234049 | -2079.324186 | 2731.792283 | 1346 |
| 2004-04-01 | 382.482066 | -2017.769151 | 2782.733284 | 755 |
| 2004-05-01 | 356.566959 | -2041.033200 | 2754.167118 | 2206 |
| 2004-06-01 | 480.027285 | -1916.384609 | 2876.439179 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 45.101% RMSE = 683.157
# Plot ACF and PACF for residuals
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 31.207754 | 0.000542 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, [1], 12) Log Likelihood -563.592
Date: Mon, 23 Oct 2023 AIC 1135.183
Time: 02:45:25 BIC 1144.290
Sample: 01-01-2004 HQIC 1138.809
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.2581 0.125 2.056 0.040 0.012 0.504
ar.L2 0.3334 0.116 2.863 0.004 0.105 0.562
ma.S.L12 -0.6334 0.117 -5.428 0.000 -0.862 -0.405
sigma2 3.445e+05 4.51e+04 7.645 0.000 2.56e+05 4.33e+05
===================================================================================
Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 12.76
Prob(Q): 0.83 Prob(JB): 0.00
Heteroskedasticity (H): 1.20 Skew: 0.57
Prob(H) (two-sided): 0.66 Kurtosis: 4.72
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1968880>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 363.258763 | -2112.590077 | 2839.107603 |
| 2004-03-01 | 471.404179 | -1978.164028 | 2920.972386 |
| 2004-04-01 | 399.504162 | -2047.351014 | 2846.359339 |
| 2004-05-01 | 388.398615 | -2057.173215 | 2833.970445 |
| 2004-06-01 | 462.663200 | -1982.834174 | 2908.160574 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 363.258763 | -2112.590077 | 2839.107603 | 886 |
| 2004-03-01 | 471.404179 | -1978.164028 | 2920.972386 | 1346 |
| 2004-04-01 | 399.504162 | -2047.351014 | 2846.359339 | 755 |
| 2004-05-01 | 388.398615 | -2057.173215 | 2833.970445 | 2206 |
| 2004-06-01 | 462.663200 | -1982.834174 | 2908.160574 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 45.927% RMSE = 680.648
# Plot ACF and PACF for residuals
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 36.958414 | 0.000058 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
=========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1)x(0, 1, [], 12) Log Likelihood -571.372
Date: Mon, 23 Oct 2023 AIC 1148.744
Time: 02:45:26 BIC 1155.574
Sample: 01-01-2004 HQIC 1151.463
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.7900 0.168 4.708 0.000 0.461 1.119
ma.L1 -0.5219 0.247 -2.111 0.035 -1.007 -0.037
sigma2 4.568e+05 6.18e+04 7.388 0.000 3.36e+05 5.78e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 3.31
Prob(Q): 1.00 Prob(JB): 0.19
Heteroskedasticity (H): 1.42 Skew: -0.07
Prob(H) (two-sided): 0.39 Kurtosis: 4.04
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1219570>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 307.528647 | -2109.155084 | 2724.212377 |
| 2004-03-01 | 308.630126 | -2099.544345 | 2716.804596 |
| 2004-04-01 | 356.645595 | -2047.597941 | 2760.889130 |
| 2004-05-01 | 324.181183 | -2078.230115 | 2726.592481 |
| 2004-06-01 | 454.625386 | -1946.928338 | 2856.179110 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 307.528647 | -2109.155084 | 2724.212377 | 886 |
| 2004-03-01 | 308.630126 | -2099.544345 | 2716.804596 | 1346 |
| 2004-04-01 | 356.645595 | -2047.597941 | 2760.889130 | 755 |
| 2004-05-01 | 324.181183 | -2078.230115 | 2726.592481 | 2206 |
| 2004-06-01 | 454.625386 | -1946.928338 | 2856.179110 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 49.141% RMSE = 737.088
# Plot ACF and PACF for residuals
fig, (ax1_American_Eagle, ax2_American_Eagle) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals_American_Eagle, ax=ax1_American_Eagle, zero=False)
plot_pacf(residuals_American_Eagle, ax=ax2_American_Eagle, zero=False, lags=9)
plt.show()
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 22.779727 | 0.01159 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
SARIMAX Results
=============================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, [1, 2], 12) Log Likelihood -563.257
Date: Mon, 23 Oct 2023 AIC 1136.515
Time: 02:45:29 BIC 1147.898
Sample: 01-01-2004 HQIC 1141.047
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.2246 0.124 1.811 0.070 -0.018 0.468
ar.L2 0.3388 0.120 2.822 0.005 0.103 0.574
ma.S.L12 -0.6673 0.205 -3.256 0.001 -1.069 -0.266
ma.S.L24 -0.1305 0.167 -0.782 0.434 -0.458 0.197
sigma2 3.223e+05 6.19e+04 5.204 0.000 2.01e+05 4.44e+05
===================================================================================
Ljung-Box (L1) (Q): 0.02 Jarque-Bera (JB): 12.55
Prob(Q): 0.88 Prob(JB): 0.00
Heteroskedasticity (H): 1.18 Skew: 0.63
Prob(H) (two-sided): 0.68 Kurtosis: 4.61
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d61d80>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,2), seasonal_order=(0, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 2)x(0, 1, 2, 12) Log Likelihood -559.518
Date: Mon, 23 Oct 2023 AIC 1133.035
Time: 02:45:33 BIC 1148.972
Sample: 01-01-2004 HQIC 1139.380
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.0446 0.063 -0.707 0.480 -0.168 0.079
ar.L2 0.9552 0.062 15.419 0.000 0.834 1.077
ma.L1 0.2078 0.143 1.455 0.146 -0.072 0.488
ma.L2 -0.6578 0.115 -5.723 0.000 -0.883 -0.433
ma.S.L12 -0.9370 0.192 -4.871 0.000 -1.314 -0.560
ma.S.L24 -0.0518 0.151 -0.342 0.732 -0.349 0.245
sigma2 2.581e+05 7.33e-07 3.52e+11 0.000 2.58e+05 2.58e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 3.74
Prob(Q): 0.93 Prob(JB): 0.15
Heteroskedasticity (H): 0.88 Skew: 0.25
Prob(H) (two-sided): 0.75 Kurtosis: 4.00
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 8.55e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a164a800>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(2, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
=============================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(2, 1, [1, 2], 12) Log Likelihood -563.121
Date: Mon, 23 Oct 2023 AIC 1140.243
Time: 02:45:36 BIC 1156.179
Sample: 01-01-2004 HQIC 1146.587
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.2206 0.127 1.740 0.082 -0.028 0.469
ar.L2 0.3291 0.120 2.732 0.006 0.093 0.565
ar.S.L12 1.2299 2.256 0.545 0.586 -3.191 5.651
ar.S.L24 -0.2305 0.993 -0.232 0.816 -2.176 1.715
ma.S.L12 -1.9795 2.125 -0.932 0.351 -6.143 2.185
ma.S.L24 0.9796 2.492 0.393 0.694 -3.904 5.864
sigma2 2.905e+05 2.26e-05 1.29e+10 0.000 2.91e+05 2.91e+05
===================================================================================
Ljung-Box (L1) (Q): 0.02 Jarque-Bera (JB): 11.82
Prob(Q): 0.90 Prob(JB): 0.00
Heteroskedasticity (H): 1.20 Skew: 0.62
Prob(H) (two-sided): 0.65 Kurtosis: 4.56
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 7.85e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a186ddb0>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, 0, 12) Log Likelihood -571.852
Date: Mon, 23 Oct 2023 AIC 1149.705
Time: 02:45:37 BIC 1156.535
Sample: 01-01-2004 HQIC 1152.424
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.3001 0.125 2.393 0.017 0.054 0.546
ar.L2 0.1788 0.116 1.544 0.123 -0.048 0.406
sigma2 4.746e+05 6.88e+04 6.895 0.000 3.4e+05 6.1e+05
===================================================================================
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB): 2.41
Prob(Q): 0.86 Prob(JB): 0.30
Heteroskedasticity (H): 1.55 Skew: 0.02
Prob(H) (two-sided): 0.29 Kurtosis: 3.90
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a12e63b0>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA Log Likelihood -671.044
Date: Mon, 23 Oct 2023 AIC 1346.087
Time: 02:45:37 BIC 1350.949
Sample: 01-01-2004 HQIC 1348.042
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1414.7620 89.626 15.785 0.000 1239.099 1590.425
sigma2 5.088e+05 8.3e+04 6.133 0.000 3.46e+05 6.71e+05
===================================================================================
Ljung-Box (L1) (Q): 10.40 Jarque-Bera (JB): 8.67
Prob(Q): 0.00 Prob(JB): 0.01
Heteroskedasticity (H): 2.08 Skew: 0.76
Prob(H) (two-sided): 0.06 Kurtosis: 3.38
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3dc4970>
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 1414.761951 | 16.710898 | 2812.813005 |
| 2004-03-01 | 1414.761951 | 16.710898 | 2812.813005 |
| 2004-04-01 | 1414.761951 | 16.710898 | 2812.813005 |
| 2004-05-01 | 1414.761951 | 16.710898 | 2812.813005 |
| 2004-06-01 | 1414.761951 | 16.710898 | 2812.813005 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 1414.761951 | 16.710898 | 2812.813005 | 886 |
| 2004-03-01 | 1414.761951 | 16.710898 | 2812.813005 | 1346 |
| 2004-04-01 | 1414.761951 | 16.710898 | 2812.813005 | 755 |
| 2004-05-01 | 1414.761951 | 16.710898 | 2812.813005 | 2206 |
| 2004-06-01 | 1414.761951 | 16.710898 | 2812.813005 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 60.603% RMSE = 707.812
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 33.222572 | 0.00025 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(0, 1, [1], 12) Log Likelihood -572.764
Date: Mon, 23 Oct 2023 AIC 1149.529
Time: 02:45:38 BIC 1154.082
Sample: 01-01-2004 HQIC 1151.341
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ma.S.L12 -0.5074 0.089 -5.690 0.000 -0.682 -0.333
sigma2 4.51e+05 7.15e+04 6.307 0.000 3.11e+05 5.91e+05
===================================================================================
Ljung-Box (L1) (Q): 11.31 Jarque-Bera (JB): 1.20
Prob(Q): 0.00 Prob(JB): 0.55
Heteroskedasticity (H): 1.61 Skew: 0.16
Prob(H) (two-sided): 0.25 Kurtosis: 3.55
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d6a8f0>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 156.734109 | 1.528942e-28 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0) Log Likelihood -665.563
Date: Mon, 23 Oct 2023 AIC 1337.126
Time: 02:45:38 BIC 1344.419
Sample: 01-01-2004 HQIC 1340.058
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1414.7619 133.929 10.564 0.000 1152.266 1677.258
ar.L1 0.3539 0.102 3.481 0.001 0.155 0.553
sigma2 4.39e+05 6.56e+04 6.694 0.000 3.1e+05 5.67e+05
===================================================================================
Ljung-Box (L1) (Q): 0.15 Jarque-Bera (JB): 24.14
Prob(Q): 0.70 Prob(JB): 0.00
Heteroskedasticity (H): 1.49 Skew: 1.08
Prob(H) (two-sided): 0.29 Kurtosis: 4.50
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1e5e140>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 17.732891 | 0.05964 |
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 1792.100807 | 493.539834 | 3090.661780 |
| 2004-03-01 | 1227.634474 | -70.926499 | 2526.195447 |
| 2004-04-01 | 1390.427272 | 91.866299 | 2688.988246 |
| 2004-05-01 | 1181.273917 | -117.287057 | 2479.834890 |
| 2004-06-01 | 1694.779025 | 396.218052 | 2993.339999 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 1792.100807 | 493.539834 | 3090.661780 | 886 |
| 2004-03-01 | 1227.634474 | -70.926499 | 2526.195447 | 1346 |
| 2004-04-01 | 1390.427272 | 91.866299 | 2688.988246 | 755 |
| 2004-05-01 | 1181.273917 | -117.287057 | 2479.834890 | 2206 |
| 2004-06-01 | 1694.779025 | 396.218052 | 2993.339999 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 51.240% RMSE = 662.554
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(2,0,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0) Log Likelihood -665.131
Date: Mon, 23 Oct 2023 AIC 1338.262
Time: 02:45:39 BIC 1347.985
Sample: 01-01-2004 HQIC 1342.170
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1416.6280 148.803 9.520 0.000 1124.979 1708.277
ar.L1 0.3129 0.136 2.296 0.022 0.046 0.580
ar.L2 0.1047 0.136 0.770 0.441 -0.162 0.371
sigma2 4.414e+05 6.66e+04 6.629 0.000 3.11e+05 5.72e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 26.32
Prob(Q): 0.93 Prob(JB): 0.00
Heteroskedasticity (H): 1.53 Skew: 1.09
Prob(H) (two-sided): 0.27 Kurtosis: 4.66
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1c7f910>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,1), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 1) Log Likelihood -665.305
Date: Mon, 23 Oct 2023 AIC 1338.610
Time: 02:45:39 BIC 1348.334
Sample: 01-01-2004 HQIC 1342.519
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1414.7625 144.605 9.784 0.000 1131.342 1698.183
ar.L1 0.5163 0.321 1.610 0.107 -0.112 1.145
ma.L1 -0.1870 0.419 -0.446 0.656 -1.009 0.635
sigma2 4.384e+05 6.57e+04 6.675 0.000 3.1e+05 5.67e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 25.95
Prob(Q): 0.91 Prob(JB): 0.00
Heteroskedasticity (H): 1.50 Skew: 1.10
Prob(H) (two-sided): 0.29 Kurtosis: 4.62
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1e5d270>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0)x(0, 1, 0, 12) Log Likelihood -573.055
Date: Mon, 23 Oct 2023 AIC 1150.111
Time: 02:45:40 BIC 1154.664
Sample: 01-01-2004 HQIC 1151.923
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.3692 0.100 3.686 0.000 0.173 0.565
sigma2 4.832e+05 6.6e+04 7.326 0.000 3.54e+05 6.13e+05
===================================================================================
Ljung-Box (L1) (Q): 0.48 Jarque-Bera (JB): 3.46
Prob(Q): 0.49 Prob(JB): 0.18
Heteroskedasticity (H): 1.35 Skew: -0.17
Prob(H) (two-sided): 0.46 Kurtosis: 4.02
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1dc4bb0>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 44.687572 | 0.000002 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 1, 0) Log Likelihood -666.295
Date: Mon, 23 Oct 2023 AIC 1336.589
Time: 02:45:40 BIC 1341.427
Sample: 01-01-2004 HQIC 1338.533
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 -0.4132 0.126 -3.275 0.001 -0.660 -0.166
sigma2 5.398e+05 5.47e+04 9.874 0.000 4.33e+05 6.47e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 42.43
Prob(Q): 0.92 Prob(JB): 0.00
Heteroskedasticity (H): 1.56 Skew: 1.09
Prob(H) (two-sided): 0.24 Kurtosis: 5.74
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a5ebdf00>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 30.259919 | 0.000777 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,1), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(0, 0, 1) Log Likelihood -667.052
Date: Mon, 23 Oct 2023 AIC 1340.104
Time: 02:45:41 BIC 1347.396
Sample: 01-01-2004 HQIC 1343.035
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1414.7619 111.943 12.638 0.000 1195.358 1634.165
ma.L1 0.2620 0.122 2.153 0.031 0.024 0.501
sigma2 4.566e+05 7.03e+04 6.498 0.000 3.19e+05 5.94e+05
===================================================================================
Ljung-Box (L1) (Q): 0.31 Jarque-Bera (JB): 17.33
Prob(Q): 0.58 Prob(JB): 0.00
Heteroskedasticity (H): 1.61 Skew: 0.98
Prob(H) (two-sided): 0.21 Kurtosis: 4.04
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3be7190>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 20.834496 | 0.022278 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,1,1), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(0, 1, 1) Log Likelihood -659.069
Date: Mon, 23 Oct 2023 AIC 1322.138
Time: 02:45:41 BIC 1326.976
Sample: 01-01-2004 HQIC 1324.082
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ma.L1 -0.8355 0.055 -15.128 0.000 -0.944 -0.727
sigma2 4.616e+05 7.26e+04 6.358 0.000 3.19e+05 6.04e+05
===================================================================================
Ljung-Box (L1) (Q): 2.27 Jarque-Bera (JB): 11.66
Prob(Q): 0.13 Prob(JB): 0.00
Heteroskedasticity (H): 1.70 Skew: 0.83
Prob(H) (two-sided): 0.16 Kurtosis: 3.77
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3b64e20>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 19.83774 | 0.030825 |
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(1,1,1), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 1, 1) Log Likelihood -657.734
Date: Mon, 23 Oct 2023 AIC 1321.467
Time: 02:45:42 BIC 1328.724
Sample: 01-01-2004 HQIC 1324.383
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.2141 0.143 1.498 0.134 -0.066 0.494
ma.L1 -0.8965 0.064 -14.005 0.000 -1.022 -0.771
sigma2 4.459e+05 6.74e+04 6.621 0.000 3.14e+05 5.78e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 19.18
Prob(Q): 0.95 Prob(JB): 0.00
Heteroskedasticity (H): 1.60 Skew: 1.00
Prob(H) (two-sided): 0.22 Kurtosis: 4.25
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a19b3700>
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_American_Eagle = ARIMA(bag_ts_American_Eagle, order=(0,0,2), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_American_Eagle.summary())
# Examine residuals from the model
residuals_American_Eagle = fit_AR1_American_Eagle.resid
plt.plot(residuals_American_Eagle)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_American_Eagle = fit_AR1_American_Eagle.get_prediction()
print(pred_units_American_Eagle)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(0, 0, 2) Log Likelihood -664.233
Date: Mon, 23 Oct 2023 AIC 1336.466
Time: 02:45:42 BIC 1346.189
Sample: 01-01-2004 HQIC 1340.374
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 1414.7620 141.485 9.999 0.000 1137.457 1692.067
ma.L1 0.3213 0.144 2.232 0.026 0.039 0.603
ma.L2 0.2756 0.116 2.380 0.017 0.049 0.503
sigma2 4.489e+05 6.67e+04 6.732 0.000 3.18e+05 5.8e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 23.64
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 1.73 Skew: 1.02
Prob(H) (two-sided): 0.15 Kurtosis: 4.60
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1a45690>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_American_Eagle, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 13.370587 | 0.203683 |
# convert to data frame
predicted_mean_American_Eagle = pred_units_American_Eagle.predicted_mean
conf_int_American_Eagle = pred_units_American_Eagle.conf_int()
prediction_df_American_Eagle = pd.DataFrame({
'predicted_mean': predicted_mean_American_Eagle,
'lower_bound': conf_int_American_Eagle.iloc[:, 0],
'upper_bound': conf_int_American_Eagle.iloc[:, 1]
})
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 1785.383897 | 448.340700 | 3122.427094 |
| 2004-03-01 | 1391.475226 | 65.500190 | 2717.450262 |
| 2004-04-01 | 1161.118923 | -152.985961 | 2475.223807 |
| 2004-05-01 | 1272.025490 | -42.035474 | 2586.086454 |
| 2004-06-01 | 1602.731357 | 289.435354 | 2916.027361 |
# add back actuals --
prediction_df_American_Eagle = pd.concat([prediction_df_American_Eagle,bag_ts_American_Eagle.to_frame(name='Baggage')],axis=1)
prediction_df_American_Eagle = prediction_df_American_Eagle.tail(-1)
prediction_df_American_Eagle.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 1785.383897 | 448.340700 | 3122.427094 | 886 |
| 2004-03-01 | 1391.475226 | 65.500190 | 2717.450262 | 1346 |
| 2004-04-01 | 1161.118923 | -152.985961 | 2475.223807 | 755 |
| 2004-05-01 | 1272.025490 | -42.035474 | 2586.086454 | 2206 |
| 2004-06-01 | 1602.731357 | 289.435354 | 2916.027361 | 1580 |
# calc error
errors_American_Eagle = prediction_df_American_Eagle['Baggage'] - prediction_df_American_Eagle['predicted_mean']
# absolue value of error
errors_abs_American_Eagle = errors_American_Eagle.abs()
# -- mape --
mape_American_Eagle = (errors_abs_American_Eagle/prediction_df_American_Eagle['Baggage'].abs()).mean()
# -- rmse
rmse_American_Eagle = (errors_American_Eagle**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_American_Eagle:.3%}')
print(f'RMSE = {rmse_American_Eagle:,.3f}')
MAPE = 50.871% RMSE = 651.822
# Generate forecast for the next six months
forecast_American_Eagle = fit_AR1_American_Eagle.get_forecast(steps=6)
mean_forecast_American_Eagle = forecast_American_Eagle.predicted_mean
confidence_intervals_American_Eagle = forecast_American_Eagle.conf_int()
# Create a DataFrame with forecasted values and confidence intervals
forecast_df_American_Eagle = pd.DataFrame({
'Forecasted_CPI': mean_forecast_American_Eagle,
'Lower_Bound': confidence_intervals_American_Eagle.iloc[:, 0],
'Upper_Bound': confidence_intervals_American_Eagle.iloc[:, 1]
})
forecast_df_American_Eagle
| Forecasted_CPI | Lower_Bound | Upper_Bound | |
|---|---|---|---|
| 2011-01-01 | 1514.979671 | 201.835725 | 2828.123617 |
| 2011-02-01 | 1688.808716 | 309.536083 | 3068.081349 |
| 2011-03-01 | 1414.761958 | -11.198800 | 2840.722716 |
| 2011-04-01 | 1414.761958 | -11.198800 | 2840.722716 |
| 2011-05-01 | 1414.761958 | -11.198800 | 2840.722716 |
| 2011-06-01 | 1414.761958 | -11.198800 | 2840.722716 |
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_American_Eagle.plot(label='Observed', legend=True)
mean_forecast_American_Eagle.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_American_Eagle.index,
confidence_intervals_American_Eagle.iloc[:, 0],
confidence_intervals_American_Eagle.iloc[:, 1], color='pink', alpha=0.3)
plt.title("American Eagle Cancelled Flights Forecast")
plt.xlabel("Date")
plt.ylabel("# Cancelled Flights")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()
# Convert date column to a date class variable
# Create a monthly index for the series
bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
bag_ts_United = pd.Series(bag_dat_United['Cancelled'].values, index=bag_dat_United['Date'])
bag_ts_United.index.freq = 'MS'
#Plot the series
plt.figure(figsize=(10, 6))
bag_ts_United.plot()
plt.title("Cancelled Flights for United Airlines")
plt.xlabel("Date")
plt.ylabel("# Cancelled FLights")
plt.show()
<ipython-input-1339-d65356821bcf>:4: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy bag_dat_United['Date'] = pd.to_datetime(bag_dat_United['Date'], format="%m/%Y")
#execute the Ljung Box test on the Coffee series using 10 lags
Bags_United = bag_dat_United[['Cancelled']]
sm.stats.acorr_ljungbox(Bags_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 37.135304 | 0.000054 |
#Plot the ACF and PACF for the coffee series
fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(bag_ts_United, ax=ax1_United, zero=False)
plot_pacf(bag_ts_United, ax=ax2_United, zero=False)
plt.show()
# Fit ARIMA(0,0,0)x(0,1,0)[12] model to the data - This is simply taking the 12-month seasonal difference
# which is yt - y(t-12)
fit_ARD12_United = ARIMA(bag_ts_United, order=(0,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Plot ACF and PACF for residuals from the seasonal difference in order to
# assess the correlation still remaining in the data
residuals12_United = fit_ARD12_United.resid
fig, (ax1_United, ax2_United) = plt.subplots(2, 1, figsize=(12, 12))
plot_acf(residuals12_United, ax=ax1_United, zero=False)
plot_pacf(residuals12_United, ax=ax2_United, zero=False, lags=9)
plt.show()
# Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0)x(0, 1, [1], 12) Log Likelihood -535.276
Date: Mon, 23 Oct 2023 AIC 1076.553
Time: 02:45:47 BIC 1083.383
Sample: 01-01-2004 HQIC 1079.272
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.3382 0.109 3.103 0.002 0.125 0.552
ma.S.L12 -0.5301 0.095 -5.555 0.000 -0.717 -0.343
sigma2 1.589e+05 1.78e+04 8.920 0.000 1.24e+05 1.94e+05
===================================================================================
Ljung-Box (L1) (Q): 1.29 Jarque-Bera (JB): 34.35
Prob(Q): 0.26 Prob(JB): 0.00
Heteroskedasticity (H): 0.78 Skew: 1.07
Prob(H) (two-sided): 0.54 Kurtosis: 5.62
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a161c6d0>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 46.873687 | 9.955560e-07 |
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0) Log Likelihood -620.147
Date: Mon, 23 Oct 2023 AIC 1248.295
Time: 02:45:47 BIC 1258.018
Sample: 01-01-2004 HQIC 1252.203
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 678.6309 109.281 6.210 0.000 464.445 892.817
ar.L1 0.2078 0.115 1.804 0.071 -0.018 0.434
ar.L2 0.2657 0.099 2.675 0.007 0.071 0.460
sigma2 1.51e+05 1.74e+04 8.678 0.000 1.17e+05 1.85e+05
===================================================================================
Ljung-Box (L1) (Q): 0.15 Jarque-Bera (JB): 122.47
Prob(Q): 0.70 Prob(JB): 0.00
Heteroskedasticity (H): 1.38 Skew: 1.75
Prob(H) (two-sided): 0.40 Kurtosis: 7.77
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3dc7880>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, 0, 12) Log Likelihood -532.970
Date: Mon, 23 Oct 2023 AIC 1071.939
Time: 02:45:47 BIC 1078.769
Sample: 01-01-2004 HQIC 1074.658
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1488 0.113 1.311 0.190 -0.074 0.371
ar.L2 0.3652 0.105 3.484 0.000 0.160 0.571
sigma2 1.608e+05 2.24e+04 7.177 0.000 1.17e+05 2.05e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 11.93
Prob(Q): 0.97 Prob(JB): 0.00
Heteroskedasticity (H): 0.85 Skew: 0.70
Prob(H) (two-sided): 0.68 Kurtosis: 4.42
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a210c5e0>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,1), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 1)x(0, 1, 1, 12) Log Likelihood -526.405
Date: Mon, 23 Oct 2023 AIC 1062.811
Time: 02:45:49 BIC 1074.194
Sample: 01-01-2004 HQIC 1067.343
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.7365 0.148 4.979 0.000 0.447 1.026
ar.L2 0.2152 0.111 1.937 0.053 -0.003 0.433
ma.L1 -0.6527 0.159 -4.107 0.000 -0.964 -0.341
ma.S.L12 -0.7401 0.168 -4.403 0.000 -1.070 -0.411
sigma2 1.178e+05 1.66e+04 7.079 0.000 8.52e+04 1.5e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 25.25
Prob(Q): 0.92 Prob(JB): 0.00
Heteroskedasticity (H): 0.65 Skew: 1.05
Prob(H) (two-sided): 0.29 Kurtosis: 5.01
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a17cc550>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 2)x(0, 1, [1], 12) Log Likelihood -525.136
Date: Mon, 23 Oct 2023 AIC 1062.271
Time: 02:45:51 BIC 1075.931
Sample: 01-01-2004 HQIC 1067.709
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.0805 0.192 0.419 0.675 -0.296 0.457
ar.L2 0.8590 0.195 4.416 0.000 0.478 1.240
ma.L1 0.0375 0.235 0.160 0.873 -0.423 0.498
ma.L2 -0.5380 0.204 -2.632 0.008 -0.939 -0.137
ma.S.L12 -0.7715 0.211 -3.665 0.000 -1.184 -0.359
sigma2 1.124e+05 1.78e+04 6.309 0.000 7.75e+04 1.47e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 26.39
Prob(Q): 0.97 Prob(JB): 0.00
Heteroskedasticity (H): 0.67 Skew: 1.10
Prob(H) (two-sided): 0.33 Kurtosis: 5.00
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1790880>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, 0, 12) Log Likelihood -532.970
Date: Mon, 23 Oct 2023 AIC 1071.939
Time: 02:45:51 BIC 1078.769
Sample: 01-01-2004 HQIC 1074.658
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1488 0.113 1.311 0.190 -0.074 0.371
ar.L2 0.3652 0.105 3.484 0.000 0.160 0.571
sigma2 1.608e+05 2.24e+04 7.177 0.000 1.17e+05 2.05e+05
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 11.93
Prob(Q): 0.97 Prob(JB): 0.00
Heteroskedasticity (H): 0.85 Skew: 0.70
Prob(H) (two-sided): 0.68 Kurtosis: 4.42
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a12909a0>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(1, 1, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(1, 1, 0, 12) Log Likelihood -530.939
Date: Mon, 23 Oct 2023 AIC 1069.878
Time: 02:45:52 BIC 1078.984
Sample: 01-01-2004 HQIC 1073.503
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1751 0.107 1.635 0.102 -0.035 0.385
ar.L2 0.3933 0.108 3.640 0.000 0.182 0.605
ar.S.L12 -0.2478 0.146 -1.701 0.089 -0.533 0.038
sigma2 1.473e+05 2.15e+04 6.865 0.000 1.05e+05 1.89e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 11.03
Prob(Q): 0.92 Prob(JB): 0.00
Heteroskedasticity (H): 0.81 Skew: 0.69
Prob(H) (two-sided): 0.60 Kurtosis: 4.33
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1d604f0>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(1, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(1, 1, [1], 12) Log Likelihood -526.380
Date: Mon, 23 Oct 2023 AIC 1062.759
Time: 02:45:53 BIC 1074.142
Sample: 01-01-2004 HQIC 1067.291
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1778 0.099 1.796 0.072 -0.016 0.372
ar.L2 0.4029 0.091 4.405 0.000 0.224 0.582
ar.S.L12 0.3386 0.134 2.526 0.012 0.076 0.601
ma.S.L12 -0.9990 0.135 -7.376 0.000 -1.264 -0.734
sigma2 1.047e+05 1.3e-06 8.05e+10 0.000 1.05e+05 1.05e+05
===================================================================================
Ljung-Box (L1) (Q): 0.01 Jarque-Bera (JB): 18.94
Prob(Q): 0.91 Prob(JB): 0.00
Heteroskedasticity (H): 0.75 Skew: 1.02
Prob(H) (two-sided): 0.48 Kurtosis: 4.47
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 6.89e+25. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1dc64d0>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,0), seasonal_order=(0, 0, 0, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==============================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 0) Log Likelihood -623.153
Date: Mon, 23 Oct 2023 AIC 1252.306
Time: 02:45:54 BIC 1259.598
Sample: 01-01-2004 HQIC 1255.237
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 678.6314 90.161 7.527 0.000 501.920 855.343
ar.L1 0.2873 0.122 2.359 0.018 0.049 0.526
sigma2 1.631e+05 2.06e+04 7.904 0.000 1.23e+05 2.04e+05
===================================================================================
Ljung-Box (L1) (Q): 0.48 Jarque-Bera (JB): 135.65
Prob(Q): 0.49 Prob(JB): 0.00
Heteroskedasticity (H): 1.58 Skew: 1.90
Prob(H) (two-sided): 0.23 Kurtosis: 7.93
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3d36230>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 22.87172 | 0.01123 |
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(1,0,2), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(1, 0, 2)x(0, 1, [1], 12) Log Likelihood -526.740
Date: Mon, 23 Oct 2023 AIC 1063.480
Time: 02:45:55 BIC 1074.863
Sample: 01-01-2004 HQIC 1068.012
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.9765 0.071 13.849 0.000 0.838 1.115
ma.L1 -0.8724 0.134 -6.530 0.000 -1.134 -0.611
ma.L2 0.1370 0.099 1.390 0.165 -0.056 0.330
ma.S.L12 -0.9949 7.869 -0.126 0.899 -16.417 14.428
sigma2 9.958e+04 7.72e+05 0.129 0.897 -1.41e+06 1.61e+06
===================================================================================
Ljung-Box (L1) (Q): 0.02 Jarque-Bera (JB): 31.94
Prob(Q): 0.87 Prob(JB): 0.00
Heteroskedasticity (H): 0.66 Skew: 1.16
Prob(H) (two-sided): 0.32 Kurtosis: 5.29
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a1790520>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
=============================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, [1, 2], 12) Log Likelihood -525.780
Date: Mon, 23 Oct 2023 AIC 1061.559
Time: 02:45:58 BIC 1072.942
Sample: 01-01-2004 HQIC 1066.091
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.1717 0.106 1.617 0.106 -0.036 0.380
ar.L2 0.3986 0.095 4.178 0.000 0.212 0.586
ma.S.L12 -0.6314 0.182 -3.466 0.001 -0.988 -0.274
ma.S.L24 -0.3676 0.148 -2.492 0.013 -0.657 -0.078
sigma2 9.985e+04 1.66e-06 6.02e+10 0.000 9.99e+04 9.99e+04
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 17.92
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 0.79 Skew: 0.99
Prob(H) (two-sided): 0.56 Kurtosis: 4.44
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 1.06e+26. Standard errors may be unstable.
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a0420160>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,1), seasonal_order=(0, 1, 2, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/statespace/sarimax.py:866: UserWarning: Too few observations to estimate starting parameters for seasonal ARMA. All parameters except for variances will be set to zeros.
warn('Too few observations to estimate starting parameters%s.'
/usr/local/lib/python3.10/dist-packages/statsmodels/base/model.py:607: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
warnings.warn("Maximum Likelihood optimization failed to "
SARIMAX Results
=============================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 1)x(0, 1, [1, 2], 12) Log Likelihood -524.684
Date: Mon, 23 Oct 2023 AIC 1061.368
Time: 02:46:03 BIC 1075.028
Sample: 01-01-2004 HQIC 1066.806
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.7123 0.172 4.139 0.000 0.375 1.050
ar.L2 0.2166 0.126 1.717 0.086 -0.031 0.464
ma.L1 -0.6267 0.180 -3.483 0.000 -0.979 -0.274
ma.S.L12 -0.6987 8.378 -0.083 0.934 -17.119 15.722
ma.S.L24 -0.2947 2.546 -0.116 0.908 -5.284 4.695
sigma2 9.798e+04 8.13e+05 0.121 0.904 -1.5e+06 1.69e+06
===================================================================================
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB): 24.39
Prob(Q): 0.87 Prob(JB): 0.00
Heteroskedasticity (H): 0.72 Skew: 1.08
Prob(H) (two-sided): 0.43 Kurtosis: 4.86
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a3d500a0>
#Fit ARIMA(1,0,0)x(0,1,1)[12] model to the data
#Best model for Baggage Complaints for United is ARIMA(1,0,0)x(0,1,1)[12]
fit_AR1_United = ARIMA(bag_ts_United, order=(2,0,0), seasonal_order=(0, 1, 1, 12)).fit()
# Summary of the ARIMA model
print(fit_AR1_United.summary())
# Examine residuals from the model
residuals_United = fit_AR1_United.resid
plt.plot(residuals_United)
plt.title('Residuals')
plt.xticks(rotation=45)
plt.show()
#Generate the predicted values for the series
pred_units_United = fit_AR1_United.get_prediction()
print(pred_units_United)
SARIMAX Results
==========================================================================================
Dep. Variable: y No. Observations: 84
Model: ARIMA(2, 0, 0)x(0, 1, [1], 12) Log Likelihood -528.251
Date: Mon, 23 Oct 2023 AIC 1064.501
Time: 02:46:03 BIC 1073.608
Sample: 01-01-2004 HQIC 1068.127
- 12-01-2010
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.2038 0.091 2.246 0.025 0.026 0.382
ar.L2 0.4335 0.086 5.030 0.000 0.265 0.602
ma.S.L12 -0.6021 0.110 -5.452 0.000 -0.819 -0.386
sigma2 1.287e+05 1.78e+04 7.243 0.000 9.39e+04 1.63e+05
===================================================================================
Ljung-Box (L1) (Q): 0.05 Jarque-Bera (JB): 14.42
Prob(Q): 0.82 Prob(JB): 0.00
Heteroskedasticity (H): 0.74 Skew: 0.90
Prob(H) (two-sided): 0.47 Kurtosis: 4.25
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
<statsmodels.tsa.statespace.mlemodel.PredictionResultsWrapper object at 0x7ee5a164a050>
#execute the Ljung Box test on the residuals
sm.stats.acorr_ljungbox(residuals_United, lags=[10], return_df=True)
| lb_stat | lb_pvalue | |
|---|---|---|
| 10 | 14.645233 | 0.145541 |
# convert to data frame
predicted_mean_United = pred_units_United.predicted_mean
conf_int_United = pred_units_United.conf_int()
prediction_df_United = pd.DataFrame({
'predicted_mean': predicted_mean_United,
'lower_bound': conf_int_United.iloc[:, 0],
'upper_bound': conf_int_United.iloc[:, 1]
})
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
| predicted_mean | lower_bound | upper_bound | |
|---|---|---|---|
| Date | |||
| 2004-02-01 | 67.689413 | -2111.295560 | 2246.674386 |
| 2004-03-01 | 111.617093 | -2058.288275 | 2281.522460 |
| 2004-04-01 | 80.478285 | -2088.197694 | 2249.154264 |
| 2004-05-01 | 80.234820 | -2087.262471 | 2247.732112 |
| 2004-06-01 | 82.668750 | -2084.667914 | 2250.005414 |
# add back actuals --
prediction_df_United = pd.concat([prediction_df_United,bag_ts_United.to_frame(name='Baggage')],axis=1)
prediction_df_United = prediction_df_United.tail(-1)
prediction_df_United.head()
| predicted_mean | lower_bound | upper_bound | Baggage | |
|---|---|---|---|---|
| Date | ||||
| 2004-02-01 | 67.689413 | -2111.295560 | 2246.674386 | 312 |
| 2004-03-01 | 111.617093 | -2058.288275 | 2281.522460 | 321 |
| 2004-04-01 | 80.478285 | -2088.197694 | 2249.154264 | 162 |
| 2004-05-01 | 80.234820 | -2087.262471 | 2247.732112 | 652 |
| 2004-06-01 | 82.668750 | -2084.667914 | 2250.005414 | 549 |
# calc error
errors_United = prediction_df_United['Baggage'] - prediction_df_United['predicted_mean']
# absolue value of error
errors_abs_United = errors_United.abs()
# -- mape --
mape_United = (errors_abs_United/prediction_df_United['Baggage'].abs()).mean()
# -- rmse
rmse_United = (errors_United**2).mean()**0.5
# -- print it out
print(f'MAPE = {mape_United:.3%}')
print(f'RMSE = {rmse_United:,.3f}')
MAPE = 48.698% RMSE = 380.126
# Generate forecast for the next six months
forecast_United = fit_AR1_United.get_forecast(steps=6)
mean_forecast_United = forecast_United.predicted_mean
confidence_intervals_United = forecast_United.conf_int()
# Create a DataFrame with forecasted values and confidence intervals
forecast_df_United = pd.DataFrame({
'Forecasted_CPI': mean_forecast_United,
'Lower_Bound': confidence_intervals_United.iloc[:, 0],
'Upper_Bound': confidence_intervals_United.iloc[:, 1]
})
forecast_df_United
| Forecasted_CPI | Lower_Bound | Upper_Bound | |
|---|---|---|---|
| 2011-01-01 | 547.621831 | -155.670057 | 1250.913719 |
| 2011-02-01 | 852.413769 | 134.680399 | 1570.147140 |
| 2011-03-01 | 509.236264 | -282.371505 | 1300.844032 |
| 2011-04-01 | 303.110520 | -499.121741 | 1105.342781 |
| 2011-05-01 | 325.908946 | -494.398479 | 1146.216371 |
| 2011-06-01 | 562.942855 | -262.429269 | 1388.314978 |
# Plot forecast
plt.figure(figsize=(14, 7))
bag_ts_United.plot(label='Observed', legend=True)
mean_forecast_United.plot(label='6-Months Forecast', legend=True)
plt.fill_between(confidence_intervals_United.index,
confidence_intervals_United.iloc[:, 0],
confidence_intervals_United.iloc[:, 1], color='pink', alpha=0.3)
plt.title("United Airlines Cancelled Flights Forecast")
plt.xlabel("Date")
plt.ylabel("# Cancelled Flights")
plt.xticks(rotation=45)
plt.tight_layout()
plt.legend()
plt.show()